import logging
from init import PATHS
LOGGER = logging.getLogger(__name__)
import pandas as pd


def main():
    OEM = pd.read_csv(PATHS.marklines / 'OEMs.csv',dtype = {'docdb_family_id':str,'bvdid':str,'year':int}).rename(columns={'Level2_bvdid':'OEM_BvDID'})
    OEM = OEM.drop("Note  ", axis=1)
    subs = pd.read_csv(PATHS.dropbox / 'Data_outputted/A_AutoIndustry/subsidiaries.csv')
    name = pd.read_csv(PATHS.dropbox / 'Data_outputted/A_AutoIndustry/subsidiaries_names.csv')
    name = name.rename(columns = {'BvD ID number':'Sub_BvDID'})
    subs['Direct'] = subs['Direct % (only figures)']>50
    gr = subs.groupby(['Subsidiary BvD ID','Shareholder BvD ID','year'])
    subs = gr.apply(lambda x: any(x['Direct'])).reset_index()
    subs = subs[['Subsidiary BvD ID','Shareholder BvD ID','year',0]].drop_duplicates().rename(columns={'Subsidiary BvD ID':'Sub_BvDID','Shareholder BvD ID':'OEM_BvDID','year':'Year',0:'Direct'})
    subs.loc[subs['Year']=='current','Year'] = 2020
    subs = subs.merge(name, on = 'Sub_BvDID',how='left')
    subs = subs.rename(columns = {'NAME':'Sub_Name'})
    ## add industry
    naics_codes = pd.read_csv(PATHS.other / 'NAICS_Codes.csv', dtype=object)
    industry = pd.read_csv(PATHS.dropbox / 'Data_outputted/A_AutoIndustry/subsidiaries_industry.csv',dtype=str)
    industry = industry.rename(columns = {'BvD ID number':'Sub_BvDID','NAICS, Primary code(s)':'naics','NAICS, Primary code(s), text description':'naics_title','US SIC, Primary code(s)':'sic','US SIC primary code, text description':'sic_title'})
    ## coverage:
    noind = industry.loc[(pd.isna(industry['naics'])==True) & (pd.isna(industry['sic'])==True)]
    nonaics = industry.loc[pd.isna(industry['naics'])==True]
    ## keep sic code only if naics missing
    industry['ind_code'] = industry.apply(lambda x: x['naics'] if pd.isna(x['naics'])==False else (x['sic']),axis=1)
    industry['ind_name'] = industry.apply(lambda x: x['naics'] if pd.isna(x['naics_title'])==False else (x['sic_title']),axis=1)
    industry['classification'] = industry.apply(lambda x: 'naics' if pd.isna(x['naics'])==False else ('sic' if pd.isna(x['sic'])==False else ('none')),axis=1)
    del industry['sic']
    del industry['sic_title']
    industry['naics3'] = industry.apply(lambda x: str(x['naics'])[0:3],axis=1)
    industry = industry.merge(naics_codes[['Code','Title']], left_on = 'naics3',right_on='Code',how='left')
    del industry['Code']
    industry = industry.rename(columns = {'Title':'naics3_title'})
    del industry['naics']
    del industry['naics_title']
    industry_firstcode = industry.groupby(['Sub_BvDID'])['ind_code', 'ind_name', 'classification', 'naics3', 'naics3_title'].first().reset_index()
    industry_allcodes = industry.groupby('Sub_BvDID')['ind_code'].agg(['unique']).reset_index().rename(columns = {'unique':'all_codes'})
    industry = industry_firstcode.merge(industry_allcodes,on='Sub_BvDID')
    subs = subs.merge(industry, on ='Sub_BvDID',how='left')
    # merge with OEM
    merge = OEM.merge(subs,on = ['Year', 'OEM_BvDID'],how='left')
    merge = merge.rename(columns = {'OEM_BvDID':'Level2_bvdid'})
    merge.to_csv(PATHS.dropbox / 'Data_outputted/A_AutoIndustry/OEM_and_Subsidiaries.csv', index=False)

